## Replication code for 
## Hainmueller, Jens and Hazlett, Chad. 2013. 
## Kernel Regularized Least Squares: Reducing Misspecification Bias with a Flexible and Interpretable Machine Learning Approach
## For questions email: Jens Hainmueller (jhainm@mit.edu)

rm(list=ls())

# KRLS package from Cran
# install.packages("KRLS")
library(KRLS)

########################################
## Figure 1: Random Samples of Functions 
N=8
D=1
x_index=seq(0,1,.01)
K = 6 
s2=.1
#pdf("figure1.pdf",width=7,height=9)
par(mfrow=c(3,2),mar=c(3,3,0,0), oma=c(0,0,.25,0.25))
for(P in 1:K){
    X=runif(N)
    c=rnorm(N)
  store <- list()
  for(j in 1:N){
   store[[j]] <- c[j]*exp(-((x_index-X[j])^2/s2))
  }
  store <- matrix(unlist(store),ncol=length(x_index),byrow=T)
    
    if( P==1 | P ==3 ){
      matplot(t(store),x=x_index,type="l",xlim=c(0,1),lty="dotted",col="black", xlab="",ylab="",
          ylim=c(-4,4),lwd=2,xaxt="n")
  title(ylab="f(x)",line=2)
 }
    if( P==2 | P ==4 ){
      matplot(t(store),x=x_index,type="l",xlim=c(0,1),lty="dotted",col="black", xlab="",ylab="",
              ylim=c(-4,4),lwd=2,xaxt="n",yaxt="n")
    }
    if( P==5  ){
      matplot(t(store),x=x_index,type="l",xlim=c(0,1),lty="dotted",col="black", xlab="x",ylab="f(x)",
              ylim=c(-4,4),lwd=2)
      title(ylab="f(x)",line=2)
      title(xlab="x",line=2)
      legend("bottomleft",legend=c("Gaussians for x_i","Superposition"),
             lty=c("dotted","solid"),lwd=c(1,2),cex=.85,bg="white")  
    }     
    if( P==6  ){
      matplot(t(store),x=x_index,type="l",xlim=c(0,1),lty="dotted",col="black", xlab="x",ylab="",
              ylim=c(-4,4),lwd=2,yaxt="n")
      title(xlab="x",line=2)
    }         
    points(y=rep(0,N),x=X,col="black",pch=19,cex=1.2)
    lines(y=apply(store,2,sum),x=x_index,lwd=3)   
}
#dev.off()


############################################################################
## Figure 2: KRLS Compares Well to OLS with Linear Data-generating Processes

# leverage point
rm(list=ls())
nn <- seq(20,600,20)
sims <- 100
store <- storelm <- list()
for(i in 1:length(nn)){
  n     <- nn[i]
  temp <- templm <-matrix(NA,sims,1)
  for(j in 1:sims){
    X         <- runif(n,0,1)
    y         <- .5*X + rnorm(n,0,.3)
    X[1]      <- 5  # contaminated point
    y[1]      <- -5 
    temp[j]   <- krls(X=X,y=y,print.level=0)$avgderivatives
    templm[j] <- coef(lm(y~X))[2]
  }
  cat("n is",n,"\n")
  store[[i]]   <- temp
  storelm[[i]] <- templm
}

###pdf("figure2a.pdf",width=7,height=6)
plot(x=nn,y=unlist(lapply(store,mean,na.rm=T)),
     pch=19,ylim=c(-1.2,.8),xaxs="r",
     ylab=c("estimate of average derivative"),xlab=c("N"),type="p")
points(x=nn,y=unlist(lapply(storelm,mean,na.rm=T)))
abline(h=.5,col="black",lty=1)
legend("bottomright",legend=c("True average dy/dx=.5","OLS estimates","KRLS estimates"),
       lty=c(1,NA,NA),pch=c(NA,21,19),col=c("black","black","black"))
###dev.off()

# efficiency comparison
rm(list=ls())
nn <- seq(20,600,20)

sims <- 100
store <- storelm <- list()
for(i in 1:length(nn)){
  n     <- nn[i]
  temp <- templm <-matrix(NA,sims,1)
  for(j in 1:sims){
    X <- rnorm(n)
    y <- 2*X + rnorm(n)    
    temp[j]  <- sqrt(krls(X=X,y=y,print.level=0)$var.avgderivatives)
    templm[j] <- summary(lm(y~X))$coeff[2,2]
  }
  cat("n is",n,"\n")
  store[[i]]   <- temp
  storelm[[i]] <- templm
}

###pdf("figure2b.pdf",width=7,height=6)
plot(x=nn,y=unlist(lapply(store,mean,na.rm=T)),pch=19,#ylim=c(-1.2,.8)
     ,ylab=c("standard error"),xaxs="r",
     xlab=c("N"),type="l",lty=2,lwd=2)
lines(x=nn,y=unlist(lapply(storelm,mean,na.rm=T)),lwd=2)
legend("topright",legend=c("KRLS: SE(E[dy/dx])","OLS: SE(beta)"),
       lty=c(2,1),col=c("black","black"),lwd=2)
###dev.off()



##################################################################
## Figure 3: KRLS With High-Frequency and Discontinuous Functions

# High-Frequency
rm(list=ls())
nn <- c(40,400)
sims <- 100
store <- storeX <- list()
for(i in 1:length(nn)){
  n     <- nn[i]
  X     <- runif(n)
  temp  <- matrix(NA,ncol=sims,nrow=n)
  for(j in 1:sims){
    y         <- .2*sin(12*pi*X)+sin(2*pi*X) + rnorm(n,0,.2)
    temp[,j]  <- krls(X=X,y=y,derivative=FALSE,vcov=FALSE,print.level=0)$fitted
  }
  store[[i]]  <- temp
  storeX[[i]] <- X
}
X <- seq(0,1,.01)
Y <- .2*sin(12*pi*X)+sin(2*pi*X)
###pdf("figure3a.pdf",width=7,height=6)
plot(y=Y,x=X,col="black",type="l",lwd=2,xlab="x",ylab="y")
for(i in 1:length(nn)){
  dd <- data.frame(x=storeX[[i]],y=apply(store[[i]],1,mean))
  dd <- dd[order(dd$x),]
  lines(x=dd$x,y=dd$y,col=c("black","black")[i],lty=1+i,lwd=2)
}
legend("topright",legend=c("Target function","KRLS estimate, N=40","KRLS estimate, N=400"),
       lty=c(1:3),col=c("black","black","black"))
###dev.off()

## Discontinuous Functions
rm(list=ls())
nn <- c(40,400)
sims <- 100
store <- storeols <- storeX <- list()
for(i in 1:length(nn)){
  n     <- nn[i]
  X     <- runif(n)
  temp<- tempols <- matrix(NA,ncol=sims,nrow=n)
  for(j in 1:sims){
    y         <- .5*(X>.5) + rnorm(n,0,.2) 
    temp[,j]  <- krls(X=X,y=y,derivative=FALSE,vcov=FALSE,print.level=0)$fitted
    tempols[,j] <- lm(y~X)$fitted
  }
  store[[i]]    <- temp
  storeols[[i]] <- tempols
  storeX[[i]]   <- X
}
X <- seq(0,1,.001)
Y <- .5*(X>.5)
###pdf("figure3b.pdf",width=7,height=6)
plot(y=Y,x=X,col="black",type="l",lwd=2,ylim=c(-.2,.8),xlab="x",ylab="y")
for(i in 1:length(nn)){
  dd <- data.frame(x=storeX[[i]],y=apply(store[[i]],1,mean))
  dd <- dd[order(dd$x),]
  lines(x=dd$x,y=dd$y,col=c("black","black")[i],lty=1+i,lwd=2)
  
  dd <- data.frame(x=storeX[[i]],y=apply(storeols[[i]],1,mean))
  dd <- dd[order(dd$x),]
  lines(x=dd$x,y=dd$y,col=c(grey(.75),grey(.75))[i],lty=1+i,lwd=2)
}
legend("bottomright",legend=c("Target function","KRLS estimate, N=40","KRLS estimate, N=400",
                              "OLS estimate, N=40","OLS estimate, N=400"),
       lty=c(1:3),col=c("black","black","black",grey(.75),grey(.75)))
###dev.off()

##################################################################
## Figure 4: KRLS Learns Interactions from the Data

# Simple Interaction
rm(list=ls())
ntemp <- 10000
sigma <- .5
X <- replicate(2,sample(0:1,ntemp,replace=T))
ytrue <- as.numeric(xor(X[,1],X[,2]))
y      <- ytrue+rnorm(ntemp,0,sigma)
R2true <- 1-var(y-ytrue)/var(y)

NN                   <- seq(10,300,10)
ntest                <- 1000
storefinal           <- matrix(NA,length(NN),2)
colnames(storefinal) <- c("R2krls","R2ols")
sims <- 100
for(i in 1:length(NN)){
  n <- NN[i]
  store <- matrix(NA,sims,2)
  colnames(store) <- c("R2krls","R2ols")
  cat("n is",n,"\n")
  for(k in 1:sims){
    # draw data
    X     <- replicate(2,sample(0:1,n,replace=T))
    ytrue <- as.numeric(xor(X[,1],X[,2]))
    y     <- ytrue+rnorm(n,0,sigma)
    # KRLS and OLS
    kout  <- krls(y=y,X=X,derivative=FALSE,vcov=FALSE,print.level=0)
    lmout <- lm(y~X)
    # test data
    Xtest <- replicate(2,sample(0:1,ntest,replace=T))
    ytest <- xor(Xtest[,1],Xtest[,2])+rnorm(ntest,0,sigma)
    # predictions
    pkrls <- predict(kout,newdata=Xtest)$fit
    pols   <- cbind(1,Xtest)%*%coef(lmout)
    # R^2s
    store[k,"R2krls"] <- 1-(var(pkrls-ytest)/var(ytest))
    store[k,"R2ols"]  <- 1-(var(pols-ytest)/var(ytest)) 
  }
  storefinal[i,] <- apply(store,2,mean,na.rm=T)
}  

# plot
###pdf("figure4a.pdf",width=7,height=6)
matplot(storefinal[,1:2],x=NN,type="b",lwd=2,ylim=c(-.2,.9),lty=c(1,2),col=c("black",grey(.3)),pch=c(19,21),
        ylab="Out of Sample R^2 (1000 Test Points)",xlab="N",main="Simple Interaction")
abline(h=R2true,col="black",lty="solid",lwd=2)
lines(storefinal[,1],x=NN,type="b",lwd=2,lty=c(1),col=c("black"),pch=c(19))
legend("topright",legend=c("KRLS","OLS","True R^2"),lty=c(1,2,1),pch=c(19,21,NA),col=c("black",grey(.3),"black"),lwd=2)
###dev.off()

## Complex Interaction
rm(list=ls())
ntemp <- 10000
sigma <- .5
X <- replicate(10,rbinom(ntemp, 1, .5))
X[,1:2] <- replicate(2,rbinom(ntemp, 1, .25))
X[,3:4] <- replicate(2,rbinom(ntemp, 1, .75))
ytruefn <- function(X){ X[,1]*X[,2]-2*X[,3]*X[,4]+X[,5]*X[,6]*3*X[,7]-X[,1]*X[,8]+2*X[,8]*X[,9]*X[,10]+X[,10] }
ytrue <- ytruefn(X)
y      <- ytrue+rnorm(ntemp,0,sigma)
R2true <- 1-var(y-ytrue)/var(y)

NN                   <- seq(10,300,10)
storefinal           <- matrix(NA,length(NN),2)
colnames(storefinal) <- c("R2krls","R2ols")
sims <- 100
for(i in 1:length(NN)){
  n <- NN[i]
  cat("n is",n,"\n")
  store <- matrix(NA,sims,2)
  colnames(store) <- c("R2krls","R2ols")
  for(k in 1:sims){
    # draw data
    X <- replicate(10,rbinom(n, 1, .5))
    X[,1:2] <- replicate(2,rbinom(n, 1, .25))
    X[,3:4] <- replicate(2,rbinom(n, 1, .75))
    ytrue <- ytruefn(X)
    y     <- ytrue+rnorm(n,0,sigma)
    # KRLS and OLS
    kout  <- krls(y=y,X=X,print.level=0,vcov=FALSE,deriv=FALSE)
    lmout <- lm(y~X)
    # test data
    ntest <- 1000
    Xtest <- replicate(10,rbinom(ntest, 1, .5))
    Xtest[,1:2] <- replicate(2,rbinom(ntest, 1, .25))
    Xtest[,3:4] <- replicate(2,rbinom(ntest, 1, .75))
    ytest <- ytruefn(Xtest)+rnorm(ntest,0,sigma)
    # predictions
    pkrls <- predict(kout,newdata=Xtest)$fit
    pols   <- cbind(1,Xtest)%*%coef(lmout)
    # R^2s
    store[k,"R2krls"] <- 1-(var(pkrls-ytest)/var(ytest))
    store[k,"R2ols"]  <- 1-(var(pols-ytest)/var(ytest))
  }
  storefinal[i,] <- apply(store,2,mean,na.rm=T)
}  

###pdf("figure4b.pdf",width=7,height=6)
matplot(storefinal[,1:2],x=NN,type="b",lwd=2,ylim=c(-.2,.9),lty=c(1,2),col=c("black",grey(.3)),pch=c(19,21),
        ylab="Out of Sample R^2 (1000 Test Points)",xlab="N",main="Complex Interaction")
abline(h=R2true,col="black",lty="solid",lwd=2)
lines(storefinal[,1],x=NN,type="b",lwd=2,lty=c(1),col=c("black"),pch=c(19))
legend("bottomright",legend=c("KRLS","OLS","True R^2"),lty=c(1,2,1),pch=c(19,21,NA),col=c("black",grey(.3),"black"),lwd=2)
###dev.off()


##################################################################
## Table 1: Comparing KRLS to OLS with Multiplicative Interactions
rm(list=ls())

sigma <- 2
X1 <- seq(0,2,length.out=250)
n <- length(X1)
X2 <- 2*X1+rnorm(n,0,1)
cat(cor(X1,X2),"\n\n")

ytruefn      <- function(x){5*(X1)^2}
ytrue=ytruefn(X1)
y=ytrue+rnorm(n,0,sigma)

sims <- 25000
storelm   <- matrix(NA,sims,4)
storek    <- matrix(NA,sims,2)
storekx1  <- storekx2 <- matrix(NA,sims,n)

for(i in 1:sims){
  y     <- ytrue+rnorm(n,0,sigma)

  lmout         <- lm(y~X1+X2+I(X1*X2))
  storelm[i,]   <- summary(lmout)$coeff[,1]
  
  kout         <- krls(y=y,X=cbind(X1,X2),derivative=T,print.level=0)
  storek[i,]   <- kout$avgderivatives
  storekx1[i,] <- kout$derivatives[,1]
  storekx2[i,] <- kout$derivatives[,2]
}

tab           <- matrix(NA,4*2,ncol=2)
colnames(tab) <- c("OLS","KRLS")
rownames(tab) <- c("const","","x1","","x2","","x1x2","")

coeffsols <-  apply(storelm,2,mean)
coeffsk   <-  apply(storek,2,mean)
sdols<-  apply(storelm,2,sd)
sdk  <-  apply(storek,2,sd)

tab[c(1,3,5,7),1] <- coeffsols
tab[c(2,4,6,8),1] <- sdols
tab[c(3,5),2] <- coeffsk
tab[c(4,6),2] <- sdk

deriv1   <- quantile(apply(storekx1,2,mean),probs=c(.25,.5,.75))
derivsd1 <- apply(apply(storekx1,1,quantile,probs=c(.25,.5,.75)),1,sd)
deriv2   <- quantile(apply(storekx2,2,mean),probs=c(.25,.5,.75))
derivsd2 <- apply(apply(storekx2,1,quantile,probs=c(.25,.5,.75)),1,sd)
deriv <- rbind(NA,NA,deriv1,derivsd1,deriv2,derivsd2,NA,NA)

table1 <- round(cbind(tab,deriv),2)
###write.csv(tab,file="tab.csv")


#############################################################################################
# Table 2: KRLS Captures Complex Interactions and Non-additivity
# Figure A.4, A.5, A.6
rm(list=ls())
library(mgcv)

# 3 target functions
#example <- 1
R2list <- list()
for(k in 1:3){
  example <- k
  if(example==1){ # Three Hills, Three Valleys
    f   <- function(x1,x2){ sin(x1)*cos(x2)}
    x1 <- x2 <-seq(0,2*pi,.2)
  }
  if(example==2){ # One Hill, One Valley
    f   <- function(x2,x1){exp(-5*(1-x1)^2+(1-x2)^2)-exp(-5*(1-x2)^2+(x1)^2)}
    x1 <- x2 <-seq(0,1,.025)
  }
  if(example==3){ # Two Hills, Two Valley
    f   <- function(x2,x1){exp(-5*(1-x1)^2+(x2)^2)+exp(-5*(x1)^2+(1-x2)^2)}
    x1 <- x2 <-seq(0,1,.025)
  }
  
  # prediction function for plotting
  ff  <- function(x1i,x2i,out){predict(object=out,newdata=data.frame(x1i,x2i))$fit}
  ff2 <- function(x1i,x2i,out){matrix(predict(object=out,newdata=data.frame(xx1=x1i,xx2=x2i)))}
  
  sims    <- 100
  n       <- 200
  ntest   <- 100
  sigma   <- .5
  sk      <- sgam <- sols <- array(NA,c(length(x1),length(x2),sims))
  R2store <- matrix(NA,sims,7)
  colnames(R2store) <- c("krlsin","krlsout","olsin","olsout","gamin","gamout","trueout")
  
  for(i in 1:sims){
    # set up data
    if(example==1){              X     <- cbind(runif(n,0,2*pi),runif(n,0,2*pi))}
    if(example==2 | example==3){ X     <- cbind(runif(n,0,1),runif(n,0,1))}
    y     <- f(X[,1],X[,2]) + rnorm(n,0,sigma)
    # fit models
    kout    <- krls(y=y,X=X,vcov=FALSE,deriv=FALSE,print.level=0)
    xx1 <- X[,1]
    xx2 <- X[,2]
    olsout  <- lm(y~xx1+xx2)
    gamout  <- gam(y~s(xx1)+s(xx2))
    # R2s
    R2store[i,"krlsin"] <- kout$R2
    R2store[i,"olsin"]  <- summary(olsout)$r.squared
    R2store[i,"gamin"]  <- 1-(var(gamout$fitted.values-y)/var(y))
    # predicted values for plotting
    sk[,,i]     <- outer(x1,x2,ff ,out=kout)
    sols[,,i]   <- outer(x1,x2,ff2,out=olsout)
    sgam[,,i]   <- outer(x1,x2,ff2,out=gamout)
    # test data for out of sample R2
    if(example==1){             Xtest     <- cbind(runif(n,0,2*pi),runif(n,0,2*pi))}
    if(example==2 | example==3){Xtest     <- cbind(runif(n,0,1),runif(n,0,1))}
    colnames(Xtest) <- c("xx1","xx2")
    ytest     <- f(Xtest[,1],Xtest[,2]) + rnorm(n,0,sigma)
    pkrls     <- predict(kout  ,newdata=Xtest)$fit
    pols      <- predict(olsout,newdata=data.frame(Xtest))
    pgam      <- predict(gamout,newdata=data.frame(Xtest))
    R2store[i,"krlsout"]   <- 1-(var(ytest-pkrls)/var(ytest))
    R2store[i,"olsout"]    <- 1-(var(ytest-pols) /var(ytest))
    R2store[i,"gamout"]    <- 1-(var(ytest-pgam) /var(ytest))
    R2store[i,"trueout"]   <- 1-(var(ytest-f(Xtest[,1],Xtest[,2]))/var(ytest))
    cat("i is",i,"\n")
  }
  pk          <- apply(sk  ,1:2,mean)
  pols        <- apply(sols,1:2,mean)
  pgam        <- apply(sgam,1:2,mean)
  R2list[[k]] <- apply(R2store,2,mean)
  
  # plot  results
  #jet.colors <- colorRampPalette( c("yellow", "red") ) 
  jet.colors <- colorRampPalette( c("black", "lightgrey") ) 
  nbcol <- 200
  color <- jet.colors(nbcol)
  ###pdf(paste("figure",c("A4","A5","A6")[k],".pdf",sep=""),width=8,height=10)
  par(mfrow=c(2,2))
  # true 
  z   <-outer(x1,x1,f)
  nrz <- nrow(z)
  ncz <- ncol(z)
  zfacet <- z[-1, -1] + z[-1, -ncz] + z[-nrz, -1] + z[-nrz, -ncz]
  facetcol <- cut(zfacet, nbcol)
  if(example==1){persp(x1, x2, z,zlab="y",theta=315,phi=30,main="True f(x1,x2)",col=color[facetcol],cex=.8)}
  if(example==2){persp(x1, x2, z,zlab="y",theta=325,phi=35,main="True f(x1,x2)",col=color[facetcol],cex=.8)}
  if(example==3){persp(x1, x2, z,zlab="y",theta=315 ,phi=30,main="True f(x1,x2)",ticktype = "simple",col=color[facetcol],cex=.8)}
  
  # KRLS 
  z   <- pk
  zfacet <- z[-1, -1] + z[-1, -ncz] + z[-nrz, -1] + z[-nrz, -ncz]
  facetcol <- cut(zfacet, nbcol)
  if(example==1){persp(x1, x2, z,zlab="y",theta=315,phi=30,main="KRLS fit f(x1,x2)",col=color[facetcol],cex=.8)}
  if(example==2){persp(x1, x2, z,zlab="y",theta=325,phi=35,main="KRLS fit f(x1,x2)",col=color[facetcol],cex=.8)}
  if(example==3){persp(x1, x2, z,zlab="y",theta=315, phi=30,main="KRLS fit f(x1,x2)",col=color[facetcol],cex=.8)}
  
  # OLS 
  z   <- pols
  zfacet <- z[-1, -1] + z[-1, -ncz] + z[-nrz, -1] + z[-nrz, -ncz]
  facetcol <- cut(zfacet, nbcol)
  if(example==1){persp(x1, x2, z,zlab="y",theta=315,phi=30,main="OLS fit f(x1,x2)",col=color[facetcol],cex=.8)}
  if(example==2){persp(x1, x2, z,zlab="y",theta=325,phi=35,main="OLS fit f(x1,x2)",col=color[facetcol],cex=.8)}
  if(example==3){persp(x1, x2, z,zlab="y",theta=315, phi=30,main="OLS fit f(x1,x2)",col=color[facetcol],cex=.8)}
  
  # GAM 
  z   <- pgam
  zfacet <- z[-1, -1] + z[-1, -ncz] + z[-nrz, -1] + z[-nrz, -ncz]
  facetcol <- cut(zfacet, nbcol)
  if(example==1){persp(x1, x2, z,zlab="y",theta=315,phi=30,main="GAM fit f(x1,x2)",col=color[facetcol],cex=.8)}
  if(example==2){persp(x1, x2, z,zlab="y",theta=325,phi=35,main="GAM fit f(x1,x2)",col=color[facetcol],cex=.8)}
  if(example==3){persp(x1, x2, z,zlab="y",theta=315, phi=30,main="GAM fit f(x1,x2)",col=color[facetcol],cex=.8)}
  ###dev.off()
}

  out <- matrix(unlist(R2list),3,7,byrow=T)
  colnames(out) <- c("krlsin","krlsout","olsin","olsout","gamin","gamout","trueout")
  rownames(out) <- c("Three Hills, Three Valleys", "One Hill, One Valley","Two Hills, Two Valleys")
  table2 <- t(round(out,2))[c(1,3,5,2,4,6),c(2,3,1)]

##########################################
##Table 3: Comparing KRLS to Other Methods
library(KRLS)
library(mgcv)
library(neuralnet)

iters=200
n_in=100  #set to 50, 100, 500 
n_out=100

names=c("KRLS","GAM1","GAM2","LM","NN")
results=matrix(NA,iters,length(names))
colnames(results)=names

for (j in 1:iters){
	print(j)
	
	#Create Data
	x1=runif(n_in)
	x2=runif(n_in)
	X_in=cbind(x1,x2)

	#Functions 2 from Wood(2003) re: thin-plate splines
	y_pure=exp((-(x1-.25)^2-(x2-.25)^2)/.1)+.5*exp((-(x1-.7)^2-(x2-.7)^2)/.07)
	y=y_pure+rnorm(n_in,0,0.25)
	
	trueR2=1-var(y-y_pure)/var(y)
	
	x1_out=runif(n_out)
	x2_out=runif(n_out)
	X_out=cbind(x1_out,x2_out)
	
	y_out_pure=exp((-(x1_out-.25)^2-(x2_out-.25)^2)/.1)+.5*exp((-(x1_out-.7)^2-(x2_out-.7)^2)/.07)
	y_out=y_out_pure+rnorm(n_out,0,0.25)

	#KRLS
	krls_out=krls(y,X=cbind(x1,x2),vcov=FALSE,deriv=FALSE,print.level=0)
	krls_predict=predict(krls_out,cbind(x1_out,x2_out))
	
	#GAM1
	gam1_out=gam(y~s(x1)+s(x2))
	gam1_predict=predict(gam1_out,data.frame(x1=x1_out,x2=x2_out))
	
	#GAM2
	gam2_out=gam(y~s(x1,x2,bs="tp"))
	gam2_predict=predict(gam2_out,data.frame(x1=x1_out,x2=x2_out))
	
	#LM
	lm_out=lm(y~x1+x1^2+x2+x2^2+x1*x2)
	lm_predict=predict(lm_out,data.frame(x1=x1_out,x2=x2_out))
	
	#Neural Net
	xdata=data.frame(y,x1,x2)
	nnout=neuralnet(y~x1+x2,xdata,hidden=c(5),threshold=0.01)
	nn_predict=compute(nnout,data.frame(x1_out,x2_out))$net.result
	
	#Load into results
	results[j,"KRLS"]=sqrt((1/n_out)*t(y_out-krls_predict$fit)%*%(y_out-krls_predict$fit))
	results[j,"GAM1"]=sqrt((1/n_out)*t(y_out-gam1_predict)%*%(y_out-gam1_predict))
	results[j,"GAM2"]=sqrt((1/n_out)*t(y_out-gam2_predict)%*%(y_out-gam2_predict))
	results[j,"LM"]=sqrt((1/n_out)*t(y_out-lm_predict)%*%(y_out-lm_predict))
	results[j,"NN"]=sqrt((1/n_out)*t(y_out-nn_predict)%*%(y_out-nn_predict))
}

#Results of interest: Table 3
round(apply(results,2,mean),3)

#Figure to display density of RMSE over iterations (not in paper)
plot(density(results[,1]),main="",ylim=c(-.1,30),xlim=c(.15,.5),xlab="RMSE (per iter)")
lines(density(results[,2]), lty=2,col=2)
lines(density(results[,3]), lty=2,col=3)
lines(density(results[,4]), lty=2,col=4)
lines(density(results[,5]), lty=2,col=5)
legend("topright", colnames(results), lty=c(1,2,2,2,2),col=c(1,2,3,4,5))
title(paste("RMSE of out-of-sample forecasts, iter=200, n=",n_in, sep=""),cex=.7)


#########################################################
## Table 4: Predictors of Genocide Onset: OLS versus KRLS
rm(list=ls())
library(pROC)
library(lattice)

load("Harff2003.RData")
colnames(d)=c("Genonset","PriorUpheaval","PriorGen","IdeologicalChar",
							"AUTOC","EthnicChar","TradeOpen")

out <- krls(y=d[,1],X=d[,-1],derivative = TRUE,binary=TRUE,print.level=0)
summary(out)

olsout <- lm(as.formula(paste(names(d)[1],"~",paste(names(d)[-1],collapse="+"),sep="")),data=d)
summary(olsout)

# create table
olstab <-  c(  summary(olsout)$coefficients[,1],
               summary(olsout)$coefficients[,2])[c(1,8,2,9,3,10,4,11,5,12,6,13,7,14)]
krlstab<- c(NA,NA,c(out$avgderivatives[1,],
                    sqrt(out$var.avgderivatives[1,]))[c(1,7,2,8,3,9,4,10,5,11,6,12)])

krlsq  <- rbind(summary(out)$qcoefficients,rep(NA,3))[c(7,7,1,7,2,7,3,7,4,7,5,7,6,7),]
table4 <- round(cbind(cbind(olstab,krlstab),krlsq),3)

###pdf("Figure5.pdf",width=10,height=8)
plot(out,which=1,
     scales=list(alternating = FALSE,
                 relation="same",yaxt="i"),
     xlab="marginal effect",col=grey(.4),
     par.settings  = list(
       strip.background = list(col = grey(.95) )),
     layout=c(3,2),type="percent")
###dev.off()

# roc test vs logit
rockrls <- roc(d$Genonset,out$fit)
logit <- glm(as.formula(paste(names(d)[1],"~",paste(names(d)[-1],collapse="+"),sep="")),
             data=d,family=binomial(link = "logit"))
roclogit  <- roc(d$Genonset,logit$fitted)
roc.test(rockrls,roclogit)


#Additional Results for Harff Example (Section 6.1): Exploring Marginal Effects
#To quickly see how covariates relate to estimated marginal effects:
#Marginal effect of prior genocide:
summary(lm(out$derivatives[,"PriorGen"]~as.matrix(d[,-1])))
#Marginal effect of ideological character:
summary(lm(out$derivatives[,"IdeologicalChar"]~as.matrix(d[,-1])))
#Marginal effect of ethnic character
summary(lm(out$derivatives[,"EthnicChar"]~as.matrix(d[,-1])))

#To interpret relationships to trade openness, split by first and fourth quartile:
hitrade=d[,"TradeOpen"]>=quantile(d[,"TradeOpen"],.75)
lowtrade=d[,"TradeOpen"]<=quantile(d[,"TradeOpen"],.25)
#See how mean marginal effect of ethnic character relates to trade openness:
mean(out$derivatives[hitrade,"EthnicChar"])
mean(out$derivatives[lowtrade,"EthnicChar"])
#See how mean marginal effect of ideological character relates to trade openness;
mean(out$derivatives[hitrade,"IdeologicalChar"])
mean(out$derivatives[lowtrade,"IdeologicalChar"])

#Similarly, examine how these marginal effects relate to prior genocide: 
mean(out$derivatives[d[,"IdeologicalChar"]==1,"PriorGen"])
mean(out$derivatives[d[,"IdeologicalChar"]==0,"PriorGen"])
mean(out$derivatives[d[,"EthnicChar"]==1,"PriorGen"])
mean(out$derivatives[d[,"EthnicChar"]==0,"PriorGen"])

#And finally check how prior genocide relates to ideological character
mean(out$derivatives[d[,"PriorGen"]==1,"IdeologicalChar"])
mean(out$derivatives[d[,"PriorGen"]==0,"IdeologicalChar"])

### Additional Figures in Appendix
##################################################
## Figure A.1  Fitting a Simple Function with KRLS
X=1:4
y=-2:1
N=4
x_index=seq(-1,6,.01)
s2=1.66667

# kernel matrix
K <- gausskernel(X,sigma=s2)
# get c's
out <- krls(X=X,y=y,sigma=s2,vcov=FALSE,deriv=FALSE,print.level=0)
# get Yhats for x_index
pout <- predict(out,newdata=x_index)

# get gaussians for plot
  store <- storescaled <- list()
  for(j in 1:N){
   store[[j]]       <- exp(-((x_index-X[j])^2/s2)) # unscaled
   storescaled[[j]] <- out$c[j]*exp(-((x_index-X[j])^2/s2)) # scaled
  }
  store       <- matrix(unlist(store),ncol=length(x_index),byrow=T)
  storescaled <- matrix(unlist(storescaled),ncol=length(x_index),byrow=T)

# plot unscaled Gaussians
###pdf("figureA1a.pdf",width=7,height=6)
matplot(t(store),x=x_index,type="l",
          col="black",lty="dotted",
          xlim=c(.5,4.5),ylim=c(-3,3),
         lwd=3,
         xlab="x",ylab="y")
points(y=rep(0,N),x=X,col="black",pch=19,cex=1.2)
legend("bottomright",
       legend=c("Unscaled Gaussians"),
       lty=c("dotted"),
       lwd=c(3),cex=1,bg="white")
###dev.off()

# plot scaled Gaussians
###pdf("figureA1b.pdf",width=7,height=6)
matplot(t(storescaled),x=x_index,type="l",
          col="black",lty="dotted",
          xlim=c(.5,4.5),ylim=c(-3,3),
         lwd=3,
         xlab="x",ylab="y")
lines(pout$fit,x=x_index,lwd=3)
points(y,x=X,pch=19,col="black",cex=2)
legend("bottomright",
       legend=c("Scaled Gaussians","KRLS Fit (Superposition)"),
       lty=c("dotted","solid"),
       lwd=c(3,3),cex=1,bg="white")
###dev.off()

## Figure A.2 Example of High and Low Frequency Functions
x=seq(0,1,.001)
y_low=sin(2*pi*x)
y_high=y_low+.5*sin(16*pi*x)

###pdf("figureA2.pdf",width=7,height=6)
plot(x,y_low,xlim=c(0,1),ylim=c(-2,2),type="l",lwd=2,xlab="x",ylab="y")
lines(x,y_high,type="l",lwd=2,lty=2)
###dev.off()


###################################################################
## Figure A.3: KRLS Fits Non-Linear Functions and their Derivatives

nn <- c(20,50,100)
sims <- 500
yylim = c(-600,600)

store <- storederiv <- storelm <- storelmderiv <- storeX <- list()
for(i in 1:length(nn)){
n     <- nn[i]
X     <- runif(n,-4,4)
y     <- 100 + 3*X^4
dydx  <- 3*4*X^3
x_index <- seq(-4,4,.1)
yplot     <- 100 + 3*x_index^4
dydxplot  <- 3*4*x_index^3

# get fitted values from KRLS and OLS for f(x) and f'(x)
temp <- tempd <- templm <- templmd <-matrix(NA,sims,n)
for(j in 1:sims){
ystar        <- y + rnorm(n,0,1)
out          <- krls(X=X,y=ystar,derivative=TRUE,print.level=0)
outlm        <- lm(ystar~X)
temp[j,]     <- out$fitted
tempd[j,]    <- out$derivatives
templm[j,]   <- outlm$fitted
templmd[j,]  <- rep(coef(outlm)[2],length(X))
}
store[[i]]        <- temp
storederiv[[i]]   <- tempd
storelm[[i]]      <- templm
storelmderiv[[i]] <- templmd
storeX[[i]]       <- X
}

###pdf("figureA3.pdf",width=7,height=9)
# plot average fits and truth
par(mfrow=c(3,2),mar=c(3,3,0,0), oma=c(0,0,.25,0.25))
for(i in 1:length(nn)){
n <- nn[i]
dat <- data.frame(X=storeX[[i]],
                  KRLS =apply(store[[i]],2,mean),
                  KRLSd=apply(storederiv[[i]],2,mean),
                  OLS=apply(storelm[[i]],2,mean),
                  OLSd=apply(storelmderiv[[i]],2,mean)
                    )
dat <- dat[order(dat$X),]

if(i<3){
plot(y=yplot,x=x_index,col="black",ylim=yylim,main="",
     xlab="",ylab="y",type="l",lwd=2,xaxt="n")
lines(y=dydxplot,x=x_index,col="darkgrey",lty="dashed",lwd=2)
points(y=dat$KRLS,x=dat$X,pch=19,col="black")
points(y=dat$KRLSd,x=dat$X,pch=17,col="darkgrey")
title(main=paste("KRLS, N=",n),line=-3)
title(ylab="y",line=2)

plot(y=yplot,x=x_index,col="black",ylim=yylim,main="",yaxt="n",xaxt="n",
     xlab="",ylab="",type="l",lwd=2)
lines(y=dydxplot,x=x_index,col="darkgrey",lty="dashed",lwd=2)
points(y=dat$OLS,x=dat$X,pch=19,col="black")
points(y=dat$OLSd,x=dat$X,pch=17,col="darkgrey")
title(main=paste("OLS, N=",n),line=-3)

} else {

  plot(y=yplot,x=x_index,col="black",ylim=yylim,main="",
       xlab="",ylab="",type="l",lwd=2)
  lines(y=dydxplot,x=x_index,col="darkgrey",lty="dashed",lwd=2)
  points(y=dat$KRLS,x=dat$X,pch=19,col="black")
  points(y=dat$KRLSd,x=dat$X,pch=17,col="darkgrey")
  legend("bottomright",pch=c(NA,19,NA,17),lty=c(1,NA,2,NA),
         legend=c("f(y)=100+3x^4","f(y) fit KRLS","dy/dx=12x^3","dy/dx fit KRLS"),
         col=c("black","black","darkgrey","darkgrey"))
  title(main=paste("KRLS, N=",n),line=-3)
  title(ylab="y",line=2)
  title(xlab="x",line=2) 
  
  plot(y=yplot,x=x_index,col="black",ylim=yylim,main="",
       xlab="",ylab="",type="l",lwd=2,yaxt="n")
  lines(y=dydxplot,x=x_index,col="darkgrey",lty="dashed",lwd=2)
  points(y=dat$OLS,x=dat$X,pch=19,col="black")
  points(y=dat$OLSd,x=dat$X,pch=17,col="darkgrey")
  legend("bottomright",pch=c(NA,19,NA,17),lty=c(1,NA,2,NA),
         legend=c("f(y)=100+3x^4","f(y) fit OLS","dy/dx=12x^3","dy/dx fit OLS"),
         col=c("black","black","darkgrey","darkgrey"))
  title(main=paste("OLS, N=",n),line=-3)
  title(xlab="x",line=2) 
}
}
###dev.off()


###################################################################
## Figure A.4-A.6 Fitting Common Interactions
## See code under Table 2 in main text section, above.

###################################################################
## Additional Empirical Example based on Brambor et al. 2006.
## Data from Golder M. Golder, 2006 (AJPS)
rm(list=ls())
## A.7.   Plot of pointwise partial derivatives
#Load prepared data from Golder (includes pre-processing done according to posted replication file)
load("GolderPrepared.Rdata")
d=Golder
rm(Golder)
covars=c("proximity1", "enpres", "eneg", "logmag")
#Run KRLS model.  To replicated original, use only data where old==1

d_clean=na.omit(d[,c("enep1",covars,"old","country")])
d_clean=d_clean[d_clean$old==1,]
#To replicated original, use only data where old==1
krlsout=krls(y=d_clean[,"enep1"],X=d_clean[,covars],print.level=0)

#pdf("figureA7.pdf",height=5,width=6)
plot(d_clean[,"enpres"],y=krlsout$derivatives[,"proximity1"],col=16,pch=16,
		 xlab="Effective Presidential Candidates",ylab="Marginal Effect of Temporal Proximity")
lines(lwd=2,lowess(f=1,x=d_clean[,"enpres"],y=krlsout$derivatives[,"proximity1"]))
#legend("bottomright",legend=c("pointwise","lowess fit"),lty=c(0,1),pch=c(20,NA),col=c(16,1))
#dev.off()

## A.8 Visualize Regression Estimates on Data Split at 2 Candidates
#For clustering SEs on country
# Based on code by Mahmood Arai, http://people.su.se/~ma/clustering.pdf)
# Modified to return separate summary and the estimates var-covar matrix of the coeficients
cl   <- function(dat,fm, cluster){
	require(sandwich, quietly = TRUE)
	require(lmtest, quietly = TRUE)
	M <- length(unique(cluster))
	N <- length(cluster)
	K <- fm$rank
	dfc <- (M/(M-1))*((N-1)/(N-K))
	uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
	vcovCl=dfc*sandwich(fm, meat=crossprod(uj)/N)
	
	r=list()
	r$results=coeftest(fm, vcovCl) 
	r$vcovCl=vcovCl
return(r)
}

#Run OLS where Presidential Candidates<=2
d_low=d_clean[d_clean$enpres<=2,]
d_hi=d_clean[d_clean$enpres>2,]

lmout_low=lm(enep1~proximity1+enpres+proximity1*enpres+eneg+logmag+logmag*eneg, data=d_low)
lmout_hi=lm(enep1~proximity1+enpres+proximity1*enpres+eneg+logmag+logmag*eneg, data=d_hi)

OLS_low=cl(dat=d_low, fm=lmout_low, cluster=as.factor(d_low$country))
OLS_hi=cl(dat=d_hi, fm=lmout_hi, cluster=as.factor(d_hi$country))

dydproxim_low=OLS_low$results["proximity1","Estimate"]+d_low[,"enpres"]*OLS_low$results["proximity1:enpres","Estimate"]
dydproxim_hi=OLS_hi$results["proximity1","Estimate"]+d_hi[,"enpres"]*OLS_hi$results["proximity1:enpres","Estimate"]

v1=OLS_low$vcovCl["proximity1","proximity1"]
v3=OLS_low$vcovCl["proximity1:enpres","proximity1:enpres"]
cov1_3=OLS_low$vcovCl["proximity1","proximity1:enpres"]

dydproxim_low_sd=sqrt(v1+d_low[,"enpres"]^2*v3+2*cov1_3)
dydproxim_low_ub=dydproxim_low+1.96*dydproxim_low_sd
dydproxim_low_lb=dydproxim_low-1.96*dydproxim_low_sd

#pdf("figureA8a.pdf",width=4,height=5)
plot(sort(d_low[,"enpres"]),dydproxim_low[order(d_low[,"enpres"])],type="l",ylim=c(-6,6),
		 xlab="Presidential Candidates",ylab="Marginal Effect of Proximity" )
points(d_low[,"enpres"],dydproxim_low,pch=20)
lines(sort(d_low[,"enpres"]),dydproxim_low_ub[order(d_low[,"enpres"])],lty=2)
lines(sort(d_low[,"enpres"]),dydproxim_low_lb[order(d_low[,"enpres"])],lty=2)
#dev.off()

v1=OLS_hi$vcovCl["proximity1","proximity1"]
v3=OLS_hi$vcovCl["proximity1:enpres","proximity1:enpres"]
cov1_3=OLS_hi$vcovCl["proximity1","proximity1:enpres"]

dydproxim_hi_sd=sqrt(v1+d_hi[,"enpres"]^2*v3+2*cov1_3)
dydproxim_hi_ub=dydproxim_hi+1.96*dydproxim_hi_sd
dydproxim_hi_lb=dydproxim_hi-1.96*dydproxim_hi_sd

#pdf("figureA8b.pdf",width=4,height=5)
plot(sort(d_hi[,"enpres"]),dydproxim_hi[order(d_hi[,"enpres"])],type="l",ylim=c(-6,6),
		 xlab="Presidential Candidates",ylab="Marginal Effect of Proximity" )
points(d_hi[,"enpres"],dydproxim_hi,pch=20)
lines(sort(d_hi[,"enpres"]),dydproxim_hi_ub[order(d_hi[,"enpres"])],lty=2)
lines(sort(d_hi[,"enpres"]),dydproxim_hi_lb[order(d_hi[,"enpres"])],lty=2)
#dev.off()







